from bs4 import BeautifulSoup
import requests
from urllib.request import urlretrieve
import os
from docx import Document
from docx.oxml.ns import qn
from docx.enum.text import WD_ALIGN_PARAGRAPH
doc = Document()                #以默认模板建立文档对象
html_page_adress = input("请输入推文地址:")
if(not ('http' in html_page_adress) ):
    print("网址输入错误！")
    html_page_adress = input("请输入推文地址:")
html = requests.get(html_page_adress)
print(html.content.decode("utf-8"))
soup = BeautifulSoup(html.content.decode("utf-8"),'html')
i = 1
def urllib_download(IMAGE_URL,path):
    global i
    urlretrieve(IMAGE_URL, path+'/img/'+str(i)+'.png')
    i=i+1
#获取标题
title = soup.select('#activity-name')
title = title[0].string.replace(" ", "").replace("\n", "")
title = title.replace('|','-')
print(title)
#新建文件夹
os.mkdir(title)
os.mkdir(title+"/img")
#获取内容
content = soup.select('#js_content')
content = str(content[0].get_text())
print(content)
#获取图片并下载保存
imgs = soup.find_all("img")
for item in imgs:
    url = ""
    try:
        url = item['data-src']
        print(url)
        urllib_download(url,title)
    except:
        pass
#保存word文档
from docx.shared import Inches,Pt
 
def chg_font(obj,fontname='微软雅黑',size=None):
     
    ## 设置字体函数
 
    obj.font.name = fontname
 
    obj._element.rPr.rFonts.set(qn('w:eastAsia'),fontname)
 
    if size and isinstance(size,Pt):
 
        obj.font.size = size
 
##设置默认字体
 
chg_font(doc.styles['Normal'],fontname='宋体')

paragraph =doc.add_paragraph('')
run = paragraph.add_run(title)
run.bold = True #设置字体为粗体
paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
paragraph2 =doc.add_paragraph(content)
paragraph2.paragraph_format.first_line_indent = Inches(0.3)
paragraph2.paragraph_format.line_spacing = 1.5
paragraph2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
doc.save(title+'/'+title+'.docx')